# coding =utf-8
# @Author : zcy
# @File : count_word.py
# @SoftWare : PyCharm

import jieba
from collections import Counter
import wordcloud
import regex as re
from PIL import Image
import numpy as np


# 统计词频
def get_word_count(textpath, stopwords=None):
    if stopwords is None:
        stopwords = []
    all_words =[]
    print("获取文件内容...")
    for line in open(textpath, encoding='utf-8'):
        line.strip('\n')
        line = re.sub("[A-Za-z0-9\：\·\—\，\。\“ \”]", "", line)  # 去除杂乱字符
        seg_list = jieba.cut(line, cut_all=False)
        all_words.extend(seg_list)
    # print(all_words)
    c = Counter()
    print("统计词频...")
    for x in all_words:
        if len(x) > 1 and x != '\r\n' and x not in stopwords:  # 长度大于一，并且不为换行等字符
            c[x] += 1
    return c


def main():
    stop = open('stop_word.txt', 'r+', encoding='utf-8')
    stop_words = stop.read().split("\n")
    text_file_path = "douluodalu.txt"
    font_path="STXINGKA.TTF"
    target_image_path = "./word_cloud.jpg"
    counter = get_word_count(text_file_path, stop_words)
    print("生成词云...")
    mask = np.array(Image.open('mask.jpg'))
    word_cloud = wordcloud.WordCloud(
        mask = mask,
        background_color="white",
        scale=3,
        font_path=font_path
    ).generate_from_frequencies(counter)
    #image_colors = wordcloud.ImageColorGenerator(mask)  # 从背景图建立颜色方案
    #word_cloud.recolor(color_func=image_colors)  # 将词云颜色设置为背景图方案
    print("保存图片...")
    word_cloud.to_file(target_image_path)  # 将图片输出为文件


if __name__ == '__main__':
    main()

